In dppalomar/fitHeavyTail: Mean and Covariance Matrix Estimation under Heavy Tails

library(knitr)
opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center",
  fig.retina = 2,
  out.width = "85%",
  dpi = 96,
  pngquant = "--speed=1"
)
knitr_in_progress <- isTRUE(getOption('knitr.in.progress'))
knit_hooks$set(pngquant = hook_pngquant)
# rmarkdown::render("R_buildignore/tricks_on_nu.Rmd", "prettydoc::html_pretty")

Numerical Comparison with Existing Benchmarks

library(mvtnorm)
library(fitHeavyTail)
library(ggplot2)
library(reshape2)

# Comparison with other packages:
#   EstimateMoments_MinskerWei(X) is not good...
#   rrcov::

N <- 20
nu <- 5
mu <- rep(0, N)

set.seed(123)
U <- t(rmvnorm(n = round(1.1*N), sigma = 0.1*diag(N)))
Sigma <- U %*% t(U) + diag(N)
Sigma_scale <- (nu-2)/nu * Sigma
qplot(eigen(Sigma)$values, geom = "histogram", xlab = "eigenvalues", fill = I("cyan"), col = I("black"),
      main = "Histogram of eigenvalues of true covariance matrix")

Before everything, we show the default arguments for the function fit_mvt():

args(fit_mvt)

Fixed $\nu$ from different methods

# compute the MSE of estimated result with the reference one
MSE <- function(est_cov) norm(est_cov - Sigma, "F")^2
eval_single_res <- function(X) {
  c("MLE" = MSE(fit_mvt(X)$cov),
    "MLE (nu = 6)" = MSE(fit_mvt(X, nu = 6)$cov),
    "MLE (nu from kurtosis)" = MSE(fit_mvt(X, nu_regcoef = 1e10)$cov),
    "MLE (nu from first iteration)" = MSE(fit_mvt(X, nu_regcoef = 1e10, nu_target_first_iter = TRUE)$cov))
}

# make the simulation a function to simplfy the following simulation
sim_proc <- function() {
  N_realiz <- 200  # multiple realizations for averaging
  T_sweep <- round(seq(from = ceiling(1.5*N), to = 5*N, length.out = 12))

  if (!knitr_in_progress) pbar <- txtProgressBar(min = it<-0, max = length(T_sweep), style=3)
  MSE_all_T <- NULL
  for(T in T_sweep) {
    if (!knitr_in_progress) setTxtProgressBar(pbar, it<-it+1)
    res <- sapply(1:N_realiz, function(idx) {
      X <- rmvt(n = T, delta = mu, sigma = Sigma_scale, df = nu)
      eval_single_res(X)
    })
    MSE_all_T <- rbind(MSE_all_T, apply(res, 1, mean))
  }


  # MSE plots
  rownames(MSE_all_T) <- T_sweep
  ggplot(melt(MSE_all_T), aes(x = Var1, y = value, col = Var2, shape = Var2)) +
    geom_line() + geom_point() + coord_cartesian(ylim = c(0, 250)) +
    theme(legend.title = element_blank()) +
    ggtitle(bquote("MSE of covariance matrix estimation for heavy-tailed data (" * N == .(N) * "," ~ nu == .(nu)* ")")) +
    xlab("T") + ylab("MSE")
}

sim_proc()

I will choose $\nu$ from kurtosis as the target for regularization in the following part.

Try regularization function and also play with the coefficient

For temporary, we try two regularization function, $\lvert \nu-\nu_{target} \rvert$ and $\left( \nu-\nu_{target} \right)^2$. Besides, we try to play with the coefficient.

# absolute difference as regularization
eval_single_res <- function(X) {
  coefs <- 10 ^ seq(-5, 5)
  res <- c(MSE(fit_mvt(X)$cov), sapply(coefs, function(coef) MSE(fit_mvt(X, nu_regfun = abs, nu_regcoef = coef)$cov)))
  names(res) <- c("MLE", paste("MLE abs", coefs))
  res
}
sim_proc()

# square difference as regularization
squ <- function(x) x^2
eval_single_res <- function(X) {
  coefs <- 10 ^ seq(-5, 5)
  res <- c(MSE(fit_mvt(X)$cov), sapply(coefs, function(coef) MSE(fit_mvt(X, nu_regfun = squ, nu_regcoef = coef)$cov)))
  names(res) <- c("MLE", paste("MLE squ", coefs))
  res
}
sim_proc()

It seems the regularization can not even defeat the fixed way.

Try another shell for incorporating $\nu$ ($\frac{\nu}{\nu-2}$)

shell_fun <- function(x) x/(x-2)
# absolute difference as regularization
eval_single_res <- function(X) {
  coefs <- 10 ^ seq(-5, 5)
  res <- c(MSE(fit_mvt(X)$cov), sapply(coefs, function(coef) MSE(fit_mvt(X, nu_regfun = abs, nu_regcoef = coef, nu_shell = shell_fun)$cov)))
  names(res) <- c("MLE", paste("MLE abs", coefs))
  res
}
sim_proc()

# square difference as regularization
squ <- function(x) x^2
eval_single_res <- function(X) {
  coefs <- 10 ^ seq(-5, 5)
  res <- c(MSE(fit_mvt(X)$cov), sapply(coefs, function(coef) MSE(fit_mvt(X, nu_regfun = squ, nu_regcoef = coef, nu_shell = shell_fun)$cov)))
  names(res) <- c("MLE", paste("MLE squ", coefs))
  res
}
sim_proc()

It seems the regularization still does not work. The fixed nu performs the best always.

\setlength{\parindent}{-0.2in} \setlength{\leftskip}{0.2in} \setlength{\parskip}{8pt} \noindent

dppalomar/fitHeavyTail documentation built on June 5, 2023, 4:52 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dppalomar/fitHeavyTail
Mean and Covariance Matrix Estimation under Heavy Tails

In dppalomar/fitHeavyTail: Mean and Covariance Matrix Estimation under Heavy Tails

Numerical Comparison with Existing Benchmarks

Fixed $\nu$ from different methods

Try regularization function and also play with the coefficient

Try another shell for incorporating $\nu$ ($\frac{\nu}{\nu-2}$)

R Package Documentation

Browse R Packages

We want your feedback!

dppalomar/fitHeavyTail Mean and Covariance Matrix Estimation under Heavy Tails

In dppalomar/fitHeavyTail: Mean and Covariance Matrix Estimation under Heavy Tails

Numerical Comparison with Existing Benchmarks

Fixed $\nu$ from different methods

Try regularization function and also play with the coefficient

Try another shell for incorporating $\nu$ ($\frac{\nu}{\nu-2}$)

R Package Documentation

Browse R Packages

We want your feedback!

dppalomar/fitHeavyTail
Mean and Covariance Matrix Estimation under Heavy Tails